Language/OS - Multiplatform Resource Library

home *** CD-ROM | disk | FTP | other *** search

/ Language/OS - Multiplatform Resource Library / LANGUAGE OS.iso / cpp_libs / perlcl16.lha / perlclass1.6 / perlclass.c++ < prev next >

Wrap

C/C++ Source or Header | 1992-11-10 | 13KB | 560 lines

/* * Version 1.6 * Written by Jim Morris, jegm@sgi.com * Kudos to Larry Wall for inventing Perl * Copyrights only exist on the regex stuff, and all * have been left intact. * The only thing I ask is that you let me know of any nifty fixes or * additions. * Credits: * I'd like to thank Michael Golan <mg@Princeton.EDU> for his critiques * and clever suggestions. Some of which have actually been implemented */ #include <iostream.h> #include <string.h> #include <malloc.h> #include <stdio.h> #ifdef __TURBOC__ #pragma hdrstop #endif #include "perlclass.h" // VarString Implementation VarString& VarString::operator=(const char *s) { int nl= strlen(s); if(nl+1 >= allocated) grow((nl-allocated)+allocinc); assert(allocated > nl+1); strcpy(a, s); len= nl; return *this; } VarString& VarString::operator=(const VarString& n) { if(this != &n){ if(n.len+1 >= allocated){ // if it is not big enough # ifdef DEBUG fprintf(stderr, "~operator=(VarString&) a= %p\n", a); # endif delete [] a; // get rid of old one allocated= n.allocated; allocinc= n.allocinc; a= new char[allocated]; # ifdef DEBUG fprintf(stderr, "operator=(VarString&) a= %p, source= %p\n", a, n.a); # endif } len= n.len; strcpy(a, n.a); } return *this; } void VarString::grow(int n) { if(n == 0) n= allocinc; allocated += n; char *tmp= new char[allocated]; strcpy(tmp, a); #ifdef DEBUG fprintf(stderr, "VarString::grow() a= %p, old= %p, allocinc= %d\n", tmp, a, allocinc); fprintf(stderr, "~VarString::grow() a= %p\n", a); #endif delete [] a; a= tmp; } void VarString::add(char c) { if(len+1 >= allocated) grow(); assert(allocated > len+1); a[len++]= c; a[len]= '\0'; } void VarString::add(const char *s) { int nl= strlen(s); if(len+nl >= allocated) grow(((len+nl)-allocated)+allocinc); assert(allocated > len+nl); strcat(a, s); len+=nl; } void VarString::add(int ip, const char *s) { int nl= strlen(s); if(len+nl >= allocated) grow(((len+nl)-allocated)+allocinc); assert(allocated > len+nl); memmove(&a[ip+nl], &a[ip], (len-ip)+1); // shuffle up memcpy(&a[ip], s, nl); len+=nl; assert(a[len] == '\0'); } void VarString::remove(int ip, int n) { assert(ip+n <= len); memmove(&a[ip], &a[ip+n], len-ip); // shuffle down len-=n; assert(a[len] == '\0'); } // // PerlString stuff // // assignments PerlString& PerlString::operator=(const PerlString& n) { if(this == &n) return *this; pstr= n.pstr; return *this; } PerlString& PerlString::operator=(const substring& sb) { VarString tmp(sb.pt, sb.len); pstr= tmp; return *this; } // concatenations PerlString PerlString::operator+(const PerlString& s) const { PerlString ts(*this); ts.pstr.add(s); return ts; } PerlString PerlString::operator+(const char *s) const { PerlString ts(*this); ts.pstr.add(s); return ts; } PerlString PerlString::operator+(char c) const { PerlString ts(*this); ts.pstr.add(c); return ts; } PerlString operator+(const char *s1, const PerlString& s2) { PerlString ts(s1); ts = ts + s2; // cout << "s2[0] = " << s2[0] << endl; // gives incorrect error return ts; } // other stuff char PerlString::chop(void) { int n= length(); if(n <= 0) return '\0'; // empty char tmp= pstr[n-1]; pstr.remove(n-1); return tmp; } int PerlString::index(const PerlString& s, int offset) { for(int i=offset;i<length();i++){ if(strncmp(&pstr[i], s, s.length()) == 0) return i; } return -1; } int PerlString::rindex(const PerlString& s, int offset) { if(offset == -1) offset= length()-s.length(); else offset= offset-s.length()+1; if(offset > length()-s.length()) offset= length()-s.length(); for(int i=offset;i>=0;i--){ if(strncmp(&pstr[i], s, s.length()) == 0) return i; } return -1; } PerlString::substring PerlString::substr(int offset, int len) { if(len == -1) len= length() - offset; // default use rest of string if(offset < 0){ offset= length() + offset; // count from end of string if(offset < 0) offset= 0; // went too far, adjust to start } return substring(*this, offset, len); } // this is private // it shrinks or expands string as required void PerlString::insert(int pos, int len, const char *s, int nlen) { if(pos < length()){ // nothing to delete if not true if((len+pos) > length()) len= length() - pos; pstr.remove(pos, len); // first remove subrange }else pos= length(); VarString tmp(s, nlen); pstr.add(pos, tmp); // then insert new substring } int PerlString::m(Regexp& r) { return r.match(*this); } int PerlString::m(const char *pat, const char *opts) { int iflg= strchr(opts, 'i') != NULL; Regexp r(pat, iflg?Regexp::nocase:0); return m(r); } int PerlString::m(Regexp& r, PerlStringList& psl) { if(!r.match(*this)) return 0; psl.reset(); // clear it first Range rng; for (int i=0; i<r.groups(); i++){ rng= r.getgroup(i); psl.push(substr(rng.start(), rng.length())); } return r.groups(); } int PerlString::m(const char *pat, PerlStringList& psl, const char *opts) { int iflg= strchr(opts, 'i') != NULL; Regexp r(pat, iflg?Regexp::nocase:0); return m(r, psl); } // // I know! This is not fast, but it works!! // int PerlString::tr(const char *sl, const char *rl, const char *opts) { if(length() == 0 || strlen(sl) == 0) return 0; int cflg= strchr(opts, 'c') != NULL; // thanks Michael int dflg= strchr(opts, 'd') != NULL; int sflg= strchr(opts, 's') != NULL; int cnt= 0, flen= 0; PerlString t; unsigned char lstc= '\0', fr[256]; // build search array, which is a 256 byte array that stores the index+1 // in the search string for each character found, == 0 if not in search memset(fr, 0, 256); for(int i=0;i<strlen(sl);i++){ if(i && sl[i] == '-'){ // got a range assert(i+1 < strlen(sl) && lstc <= sl[i+1]); // sanity check for(unsigned char c=lstc+1;c<=sl[i+1];c++){ fr[c]= ++flen; } i++; lstc= '\0'; }else{ lstc= sl[i]; fr[sl[i]]= ++flen; } } int rlen; // build replacement list if((rlen=strlen(rl)) != 0){ for(i=0;i<rlen;i++){ if(i && rl[i] == '-'){ // got a range assert(i+1 < rlen && t[t.length()-1] <= rl[i+1]); // sanity check for(char c=t[i-1]+1;c<=rl[i+1];c++) t += c; i++; }else t += rl[i]; } } // replacement string that is shorter uses last character for rest of string // unless the delete option is in effect or it is empty while(!dflg && rlen && flen > t.length()){ t += t[t.length()-1]; // duplicate last character } rlen= t.length(); // length of translation string // do translation, and deletion if dflg (actually falls out of length of t) // also squeeze translated characters if sflg PerlString tmp; // need this in case dflg, and string changes size for(i=0;i<length();i++){ int off; if(cflg){ // complement, ie if NOT in f char rc= !dflg ? t[t.length()-1] : '\0'; // always use last character for replacement if((off=fr[(*this)[i]]) == 0){ // not in map cnt++; if(!dflg && (!sflg || tmp.length() == 0 || tmp[tmp.length()-1] != rc)) tmp += rc; }else tmp += (*this)[i]; // just stays the same }else{ // in fr so substitute with t, if no equiv in t then delete if((off=fr[(*this)[i]]) > 0){ off--; cnt++; if(rlen==0 && !dflg && (!sflg || tmp.length() == 0 || tmp[tmp.length()-1] != (*this)[i])) tmp += (*this)[i]; // stays the same else if(off < rlen && (!sflg || tmp.length() == 0 || tmp[tmp.length()-1] != t[off])) tmp += t[off]; // substitute }else tmp += (*this)[i]; // just stays the same } } *this= tmp; return cnt; } int PerlString::s(const char *exp, const char *repl, const char *opts) { int gflg= strchr(opts, 'g') != NULL; int iflg= strchr(opts, 'i') != NULL; int cnt= 0; Regexp re(exp, iflg?Regexp::nocase:0); Range rg; if(re.match(*this)){ // OK I know, this is a horrible hack, but it seems to work if(gflg){ // recursively call s() until applied to whole string rg= re.getgroup(0); if(rg.end()+1 < length()){ PerlString st(substr(rg.end()+1)); // cout << "Substring: " << st << endl; cnt += st.s(exp, repl, opts); substr(rg.end()+1)= st; // cout << "NewString: " << *this << endl; } } if(!strchr(repl, '$')){ // straight, simple substitution rg= re.getgroup(0); substr(rg.start(), rg.length())= repl; cnt++; }else{ // need to do subexpression substitution char c; const char *src= repl; PerlString dst; int no; while ((c = *src++) != '\0') { if(c == '$' && *src == '&'){ no = 0; src++; }else if(c == '$' && '0' <= *src && *src <= '9') no = *src++ - '0'; else no = -1; if(no < 0){ /* Ordinary character. */ if(c == '\\' && (*src == '\\' || *src == '$')) c = *src++; dst += c; }else{ rg= re.getgroup(no); dst += substr(rg.start(), rg.length()); } } rg= re.getgroup(0); substr(rg.start(), rg.length())= dst; cnt++; } return cnt; } return cnt; } PerlStringList PerlString::split(const char *pat, int limit) { PerlStringList l; l.split(*this, pat, limit); return l; } // // PerlStringList stuff // int PerlStringList::split(const char *str, const char *pat, int limit) { Regexp re(pat); Range rng; PerlString s(str); int cnt= 1; if(*pat == '\0'){ // special empty string case splits entire thing while(*str){ s= *str++; push(s); } return count(); } if(strcmp(pat, "' '") == 0){ // special awk case char *p, *ws= " \t\n"; TempString t(str); // can't hack users data p= strtok(t, ws); while(p){ push(p); p= strtok(NULL, ws); } return count(); } while(re.match(s) && (limit < 0 || cnt < limit)){ // find separator rng= re.getgroup(0); // full matched string (entire separator) push(s.substr(0, rng.start())); for(int i=1;i<re.groups();i++){ push(s.substr(re.getgroup(i))); // add subexpression matches } s= s.substr(rng.end()+1); cnt++; } if(s.length()) push(s); if(limit < 0){ // strip trailing null entries int off= count()-1; while(off >= 0 && (*this)[off].length() == 0){ off--; } splice(off+1); } return count(); } PerlString PerlStringList::join(const char *pat) { PerlString ts; for(int i=0;i<count();i++){ ts += (*this)[i]; if(i<count()-1) ts += pat; } return ts; } PerlStringList::PerlStringList(const PerlStringList& n) { for(int i=0;i<n.count();i++){ push(n[i]); } } PerlStringList& PerlStringList::operator=(const PerlList<PerlString>& n) { if(this == &n) return *this; // erase old one reset(); for(int i=0;i<n.count();i++){ push(n[i]); } return *this; } int PerlStringList::m(const char *rege, const char *targ, const char *opts) { int iflg= strchr(opts, 'i') != NULL; Regexp r(rege, iflg?Regexp::nocase:0); if(!r.match(targ)) return 0; Range rng; for (int i=0; i<r.groups(); i++){ rng= r.getgroup(i); push(PerlString(targ).substr(rng.start(), rng.length())); } return r.groups(); } PerlStringList PerlStringList::grep(const char *rege, const char *opts) { PerlStringList rt; int iflg= strchr(opts, 'i') != NULL; Regexp rexp(rege, iflg?Regexp::nocase:0); // compile once for(int i=0;i<count();i++){ if(rexp.match((*this)[i])){ rt.push((*this)[i]); } } return rt; } // streams stuff istream& operator>>(istream& ifs, PerlString& s) { char c; #if 0 char buf[40]; #else char buf[132]; #endif s= ""; // empty string ifs.get(buf, sizeof buf); // This is tricky because a line teminated by end of file that is not terminated // with a '\n' first is considered an OK line, but ifs.good() will fail. // This will correctly return the last line if it is terminated by eof with the // stream still in a non-fail condition, but at eof, so next call will fail as // expected if(ifs){ // previous operation was ok s += buf; // append buffer to string // cout << "<" << buf << ">" << endl; // if its a long line continue appending to string while(ifs.good() && (c=ifs.get()) != '\n'){ // cout << "eof1= " << ifs.eof() << endl; ifs.putback(c); // cout << "eof2= " << ifs.eof() << endl; if(ifs.get(buf, sizeof buf)) s += buf; // append to line } } return ifs; } istream& operator>>(istream& ifs, PerlStringList& sl) { PerlString s; // Should I reset sl first? sl.reset(); // I think so, to be consistent while(ifs >> s){ sl.push(s); // cout << "<" << s << ">" << endl; }; return ifs; } ostream& operator<<(ostream& os, const PerlString& arr) { #ifdef TEST os << "(" << arr.length() << ")" << "\""; os << (const char *)arr; os << "\""; #else os << (const char *)arr; #endif return os; } ostream& operator<<(ostream& os, const PerlStringList& arr) { for(int i=0;i<arr.count();i++) #ifdef TEST os << "[" << i << "]" << arr[i] << endl; #else os << arr[i] << endl; #endif return os; }